from pyquery import PyQuery
from requests.exceptions import RequestException
import requests
import time,json

def getPage(url):
    '''爬取数据'''
    try:
        headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.5383.400 QQBrowser/10.0.1313.400',
                 'Connection':'keep - alive',
                 'Host':'cart.jd.com',
                 'Origin':'https: // cart.jd.com',
                 'X-Requested-With':'XMLHttpRequest',
                 'Content - Length': '160',
                 'Content - Type': 'application / x - www - form - urlencoded',
                 'Accept': 'application / json, text / javascript, * / *; q = 0.01',
                 'Accept - Encoding': 'gzip, deflate, br',
                 'Accept - Language': 'zh - CN, zh;q = 0.9',
                 'Referer':'https: // cart.jd.com / cart.action',
                 'Cookie':'_jrda=4; shshshfpa=127bef96-d0c1-2a25-10ce-6a574b5c4d18-1524990397; ipLocation=%u5317%u4eac; ipLoc-djd=1-2801-2827-0.560192795; PCSYCityID=1; 3AB9D23F7A4B3C9B=644APREGHRCZKTDND675NWXG3BF6BWF4HQCEBWGHC5LOMW7YUPJY34WOOGVTG3QMBDY3IJSGAQF3TX7YEAPRIJTOPI; shshshfpb=2ef28ca06392e48f5b7e4271cd7f295875ae5815b7c098821675fe0b85; mt_xid=V2_52007VwMaUFhQVlMfShhsBTQAFldcCFRGTB1KDBliChtXQVEFChlVSlUNMwERUF9fAVIfeRpdBW4fE1ZBW1VLHEgSXwRsARJiX2hRahxIH1QAYjMRVV1e; unpl=V2_ZzNtbRYEQRZzCkNRc05YUGIDQlUSB0FGJ1pGUCtJWwJmVBNYclRCFXwUR1BnGloUZAMZXUtcQhZFCHZXchBYAWcCGllyBBNNIEwHDCRSBUE3XHxcFVUWF3RaTwEoSVoAYwtBDkZUFBYhW0IAKElVVTUFR21yVEMldQl2VHMbWQ1jChJeQ2dzEkU4dlN%2fEVsCYjMTbUNnAUEpDkRWehFcSGcLEFhKU0oVdgl2VUsa; user-key=4d6b6508-3637-425d-8dcd-f54ba52caeca; pinId=NPL1fFYZgNRWED_RnhqS1LV9-x-f3wj7; pin=jd_66cb60abcb5d9; unick=%E6%88%91%E4%BA%86%E4%B8%AA%E6%88%91%E5%95%8A; _tp=jc2oJIGlfqgfWi1l78DSS7k0t%2FrLIw8zitgLEIUa%2Fxc%3D; _pst=jd_66cb60abcb5d9; __jdv=122270672|baidu-pinzhuan|t_288551095_baidupinzhuan|cpc|0f3d30c8dba7459bb52f2eb5eba8ac7d_0_eb3273449f5d41a9aa3bcc15aa660f04|1527427109102; cart-main=xx; cd=0; __jdu=1934958120; shshshfp=c89e8c85f51be4a7b06fc175180a5796; __jda=122270672.1934958120.1505571407.1527427082.1527506614.48; __jdc=122270672; wlfstk_smdl=707fonyt4eu8zrl5f9qa6ke5n95mhyko; TrackID=1KeazMUVUuAp1UnIXIc4VYY_G3V5nFgC79MGmcVqdI9FekwUBbLRQt0pmtCc0_C7s1X31TTeKrrkfoQLATzqVw5pKv9LNQ8TpALvVzY88kuMEPOax1w2BQ6kn8RopZtjJzRJEWqvZG_GUyoWVKm7Dqg; thor=B66D45B48C859ADB7D34D3C1C62597B1EC2FD94DD7092A24E9CA8E86E085747ADA5707B1FF05AA906CDE18C678470EC536C72E0BCB5C96D4CA52F9042E0C01E1E3D917FD4DB0DFFFD93F575FDF92E4C29E71719FE0EA7EB4B8C65DC1217511D259CE9FED5E250267846F063C8DA6787E43E877B76A9D305CD56835F11044FE7BF1629F815D388468B5C1C4567A8021D5BC6553C8DD2C0B24486DF70E1F1A1D92; ceshi3.com=201; cn=51; shshshsID=73a3235f7fc929893dcd13e20907b373_3_1527506658406; __jdb=122270672.5.1934958120|48.1527506614'
                 }
        data = requests.get(url,headers=headers)
        if data.status_code == 200:
            return data.text
        else:
            return None
    except RequestException as err:
        print(err)
        return None

def parseData(content):
    '''解析数据'''
    doc= PyQuery(content)
    items = doc('div.item-form')
    for item in items.items():
        yield {
            'name':item.find('div.p-name a').text(),
            'url':item.find('div.p-img a').attr('href'),
            'img':item.find('div.p-img img').attr('src'),
            'price':item.find('div.p-price strong').text(),
        }

def saveFile(content):
    with open('./jd.txt','a',encoding="utf-8") as f:
        f.write(json.dumps(content,ensure_ascii=False) + '\n\n')

def start(offset):
    url = "https://cart.jd.com/cart.action"
    html = getPage(url)
    if html:
        for dict in parseData(html):
            saveFile(dict)

    '''爬虫入口'''


if __name__ == '__main__':
    start(0)
